We are going to look at NYC Restaurant Inspection Results.
library(tidyverse)
## -- Attaching packages --------- tidyverse 1.3.0 --
## √ ggplot2 3.3.2 √ purrr 0.3.4
## √ tibble 3.0.3 √ dplyr 1.0.2
## √ tidyr 1.1.2 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.5.0
## -- Conflicts ------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(httr)
library(jsonlite)
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:httr':
##
## config
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
get_all_inspections = function(url) {
all_inspections = vector("list", length = 0)
loop_index = 1
chunk_size = 50000
DO_NEXT = TRUE
while (DO_NEXT) {
message("Getting data, page ", loop_index)
all_inspections[[loop_index]] =
GET(url,
query = list(`$order` = "zipcode",
`$limit` = chunk_size,
`$offset` = as.integer((loop_index - 1) * chunk_size)
)
) %>%
content("text") %>%
fromJSON() %>%
as_tibble()
DO_NEXT = dim(all_inspections[[loop_index]])[1] == chunk_size
loop_index = loop_index + 1
}
all_inspections
}
url = "https://data.cityofnewyork.us/resource/43nn-pn8j.json"
nyc_inspections =
get_all_inspections(url) %>%
bind_rows()
## Getting data, page 1
## Getting data, page 2
## Getting data, page 3
## Getting data, page 4
## Getting data, page 5
## Getting data, page 6
## Getting data, page 7
## Getting data, page 8
## Getting data, page 9
tidy_inspections =
nyc_inspections %>%
select(phone,cuisine_description,inspection_date,score,latitude,longitude,building) %>%
filter(inspection_date == "2019-02-21T00:00:00.000")
tidy_inspections %>%
mutate(text_label = str_c("Cuisine: ", cuisine_description, "\nScore: ", score, "\nNumber: ", phone)) %>%
plot_ly(
x = ~latitude, y = ~longitude, color = ~building, alpha = 0.5,text = ~text_label, type = "scatter", mode = "markers")
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: Ignoring 3 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
tidy_inspections %>%
mutate(score = as.factor(score)) %>%
count(score) %>%
mutate(
score = fct_reorder(score, n)) %>%
plot_ly(x = ~score, y = ~n, color = ~score, type = "box", colors = "viridis")
## Warning: Ignoring 1 observations
tidy_inspections %>%
count(building) %>%
plot_ly(x = ~building, y = ~n, color = ~n, type = "bar", colors = "viridis")
## Warning: textfont.color doesn't (yet) support data arrays
## Warning: textfont.color doesn't (yet) support data arrays